# S-Plus code: Example 9.9
# Coded by: ZONGWU CAI
                                   
#  "Functional-Coefficient Regression Models for Nonlinear Time Series". 
  
# THIS FILE IS FOR SEARCHING BEST MODEL FOR EXAMPLE 4, BASED ON PROCEDURE
# DESCRIBED IN SECTION 3.3, IN THE PAPER BY CAI, FAN AND YAO (2000, JASA).
# DOI: 10.2307/2669476.
#
# THE STRATEGY IS TO CONSIDER THE ORDER OF MODEL 2 <= p <= 11 AND TO FIND
# THE VARYING VARIABLE AT LAG d, 1<= d <= p.
# IN THIS PROGRAM, TWO FUNCTIONS ARE DEFINED::
# localfit(...)  FOR COMPUTING LOCAL LINEAR ESTIMATION
# bandwidth(...) FOR COMPUTING THE AMS-VALUES DEFINED IN (12) FOR CERTAIN 
#                RANGE OF BANDWIDTHS.


# TO RUN THIS PROGRAM, WHAT YOU NEED TO DO IS TO INPUT FOLLOWINGS:
  
# A.   You need to set following parameters according to your case 

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  pc <- 0      # (1) Set pc=1 if running in PC; otherwise, set pc to be 0.
  if(pc==1){datafile<-"c:\\ ... \\sunspot.dat"}else
                {datafile<-"sunspot.dat"}
               # (2) Set data file name. For instance: datafile="sunspot.dat".
  Q  <- -1     # (3) Set Q, the number of multi-folds; set Q=-1 for default. 
  h0 <- seq(1,5,by=0.05)    
               # (4) Set h0, a sequence of the possible values for bandwidths; set h0=-1 for default. 
  trans <- 1   # (5) Set trans=1, if do squared-root transformation on response.
               # Note: you need to modify the program if transformation is different.
  p.max <- 11  # (6) Set p.max, the largest order of model considered.
    
# B. Type ("far4-lag.s")
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# YOU MIGHT NOT NEED TO MODIFY THE FOLLOWING CODE IF YOU DO NOT 
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

#++++++++++++++++++
# DEFINE FUNCTIONS
#++++++++++++++++++
 
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
kernel<-function(x){0.75*(1-x^2)*(abs(x)<=1)}  # DEFINE KERNEL FUNCTION
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  
  
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# DEFINE THE FUNCTION TO COMPUTE the LOCAL LINEAR ESTIMATE
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   
  localfit <- function(y,x,h,u,z){
  # y -- response variable; 
  # x -- design matrix; 
  # u -- varying variable;
  # h -- bandwidth; 
  # z -- grid point.
  
  ngrid   <- length(z)                        # number of grid points
  n       <- length(y)                        # number of data points
  p       <- dim(x)[2]                        # number of covariates
  ff      <- rep(0,ngrid*p)
  dim(ff) <- c(ngrid,p) 
  for (k in 1:ngrid){
    dx     <- cbind(x,(u-z[k])*x)
    w0     <- kernel((u-z[k])/h)/h
    s0     <- t(dx)%*%(w0*dx)
    s1     <- solve(s0+0.001*diag(2*p))
    beta   <- s1%*%(t(dx)%*%(w0*y))
    ff[k,] <- beta[1:p]
   }
  return(ff)
  }
  #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  
  #+++++++++++++++++++++++++++++++++++++++++
  # DEFINE BANDWIDTH SELECTOR BASED ON (12)
  #+++++++++++++++++++++++++++++++++++++++++

  bandwidth <- function(y,x,h0,u,Q){
  # y - response; 
  # x - design matrix; 
  # u - varying variable; 
  # h0 - possible bandwidths; 
  # Q - value of folds
  
  n <- length(y)                          # sample size
  if(Q<=0){Q<-3}
  if(sum(h0)<=0){
    y.range <- max(y)-min(y)
    aa      <- n^{-0.2}
    h0      <- seq(0.4*y.range*aa,0.8*y.range*aa,by=0.2)
    # As default, set h0=c*n^{-0.2} with c from 0.4 to 0.8
  }
  
  m        <- floor(0.1*n)                # define m
  nh       <- length(h0)                  # number of possible bandwidths
  ams      <- rep(0,Q*nh)
  dim(ams) <- c(nh,Q)
  for(qq in 1:Q){
    n1 <- n-qq*m
    y0 <- y[1:n1]
    x0 <- x[1:n1,]
    u0 <- u[1:n1]
    z0 <- u[(n1+1):(n1+m)] 
    for(j in 1:nh){
      h         <- h0[j]*(n/n1)^0.2
      ff        <- localfit(y0,x0,h,u0,z0)
      yhat      <- apply(ff*x[(n1+1):(n1+m),],1,sum)
      ams[j,qq] <- mean((y[(n1+1):(n1+m)]-yhat)^2)}
    }
  ams <- apply(ams,1,sum)
  return(ams)
  }
  #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  
  #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  # READ DATA FROM DATA FILE. FOR EXAMPLE: sunspot data set
  #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++

  data <- USUNEMP250                # matrix(scan(datafile),byrow=T,ncol=1)
  n    <- 250                           
  if(trans==1){
  data    <- 1*((data))}        
  AMS     <- rep(0,p.max-1)
  best.d  <- rep(0,p.max-1)
  order.p <- 2:p.max

  for(p in 2:p.max){                # start the model with two lags to p.max lags
    y      <- data[(p+1):n]  
    x      <- rep(0, p*(n-p))
    dim(x) <- c(n-p,p)
    for(j in 1:p){
      x[,j] <- data[(p-j+1):(n-j)]  # design matrix consisting of lagged variables
    }

    print(c("I am working on the model with p lags", p))
    aa <- rep(0,p)
    print("Please wait, I am working on searching for varying variable")

    for(d in 1:p){                   # start to search for varying variable
      u <- data[(p-d+1):(n-d)]       # set varying variable as lag d, 1<= d <= p
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

      #++++++++++++++++
      # DO COMPUTATION
      #++++++++++++++++

      ams1  <- bandwidth(y,x,h0,u,Q) # compute AMS for certain values of bandwidth
      aa[d] <- min(ams1)             # Find the smallest AMS for this model
    }    # end of the loop for d

   ind         <- order(aa)[1]       # locate the index of varying variable
   AMS[p-1]    <- aa[ind]            # the AMS value for the selected model
   best.d[p-1] <- ind
   data1       <- cbind(order.p,best.d,AMS)
   print(data1)
  }      # end of the loop for p

  print(("THE SEARCH RESULTS ARE AS FOLLOWS:"))
  print(data1)
  #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

  #+++++++++++++++++++++
  print(c("I AM DONE"))
  #+++++++++++++++++++++
